### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
##  This program gathers the data needed for the figures for         ##
##  the business cycle paper.                                        ##
##                                                                   ##
## Author: Sarah Friedman                                            ##
## Date: April 2021                                                  ##
## Update: NA                                                        ##
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###

# Files required to run this script ---------------------------------------

# dfa_networth_bus_cycle2_shares.csv
# dfa_networth_bus_cycle2_levels.csv
# dfa_networth_bus_cycle2_elasticity_df.csv (output of dfa_elasticities.R script)
# dfa_networth_bus_cycle2_portfolio_returns.csv (output of dfa_portfolio_returns.R script)
# dfa_networth_kalman_bus_cycle2_shares.csv
# dfa_networth_kalman_bus_cycle2_levels.csv
# dfa_networth_kalman_bus_cycle2_delta_se.csv (output of standard_errors_kalman.R)
# dfa_networth_bus_cycle2_delta_se.csv (output of standard_errors.R)
# dfa_networth_fernandez_bus_cycle2_shares.csv
# dfa_networth_litterman_bus_cycle2_shares.csv
# dfa_networth_2009q1_bus_cycle2_levels.csv
# dfa_networth_shares.csv
# out_of_sample_bus_cycle_v3.xlsx (contains SCF panel data)



# Preliminaries -----------------------------------------------------------

output_directory <- file.path(home_directory, "outputs")

setwd(output_directory)

groupnames <- dfa_splits(group = "networth", agg_level = "bus_cycle2")$groupnames

# Helper functions --------------------------------------------------------

dfa_pivot_wider <- function(df, col) {
  d <- df %>% 
    select(date, cat, any_of(col)) %>%
    pivot_wider(id_cols = "date", names_from = "cat", values_from = all_of(col)) %>%
    select(date, all_of(rev(groupnames)))
}


balancesheeter_buscycle <- function(df, date) {
  date_input = date
  df_balance <- df %>% 
    mutate(morts = -morts, 
           other_liab = -liab - morts, 
           other_fin = fin - corp_equ_mfs - non_corp_eq, 
           non_fin = non_fin,
           bus_eq = corp_equ_mfs + non_corp_eq) %>%
    select(c(date,cat,bus_eq,other_fin,non_fin,morts,other_liab,networth)) %>%
    dplyr::group_by(cat) %>%
    tidyr::gather(key = class, value = value, -c(date,cat)) %>%
    arrange(date) %>%
    subset(date == date_input)
  return(df_balance)
}

# four quarter moving average
fourqma <- function(x) {
  ma <- (x + dplyr::lag(x, n=1) + dplyr::lag(x,n=2) + dplyr::lag(x,n=3)) / 4
  return(ma)
}

rplot.bar.data <- function(vec, start.date =  c(2007,1)) {
  ll <- list(annual = NULL, semi = NULL, 
             qtrly = tis(vec, start = start.date, tif = "quarterly"),
             monthly = NULL, daily = NULL)
}


# Baseline data -----------------------------------------------------------

dfa_levels_bynw <- read_dfa(.item = "levels", .group = "networth", .agg_level = "bus_cycle2", .method = "fernandez", path = output_directory) %>%
  mutate_if(is.numeric, ~./1e12) %>% 
  mutate(date = as.Date(date, frac=1)) 

dfa_shares_bynw <- read_dfa(.item = "shares", .group = "networth", .agg_level = "bus_cycle2", .method = "fernandez", path = output_directory) %>%
  mutate_if(is.numeric, ~.*100) %>% 
  mutate(date = as.Date(date, frac=1)) 

scf_levels <- read_dfa(.item = "scf_levels", .group = "networth", .agg_level = "bus_cycle2", path = output_directory) %>%
  mutate(date = as.Date(date, frac=1)) %>%
  mutate(non_fin = real_estate+con_durables, 
         debt_sec=us_gov_muni+corp_for_bonds, 
         ass_loans=oth_loans+morts_owed, 
         liab_loans = morts+cons_cred+dep_loans+oth_loans_adv) %>% 
  mutate(fin = check_curr+time_dep+mmfs+debt_sec+ass_loans+corp_equ_mfs+life_ins+pension+non_corp_eq+misc_assets) %>% 
  mutate(assets = non_fin+fin, 
         liab = liab_loans+unpaid_life_ins) %>% 
  mutate(networth = assets - liab) %>% 
  select(date, cat, assets, non_fin, real_estate, con_durables, fin, check_curr, time_dep, mmfs, debt_sec, us_gov_muni, 
         corp_for_bonds, ass_loans, oth_loans, morts_owed, corp_equ_mfs, life_ins, pension, non_corp_eq, misc_assets, 
         liab, liab_loans, morts, cons_cred, dep_loans, oth_loans_adv, unpaid_life_ins, networth) %>%
  arrange(date,cat)

ngroups <- length(unique(dfa_levels_bynw$cat))


nw_shares_bynw <- dfa_pivot_wider(dfa_shares_bynw, "networth")

nw_levels_bynw <- dfa_pivot_wider(dfa_levels_bynw, "networth")

assets_levels_bynw <- dfa_pivot_wider(dfa_levels_bynw, "assets")

assets_shares_bynw <- dfa_pivot_wider(dfa_shares_bynw, "assets")

liab_levels_bynw <- dfa_pivot_wider(dfa_levels_bynw, "liab")

liab_shares_bynw <- dfa_pivot_wider(dfa_shares_bynw, "liab")


# Standard errors ---------------------------------------------------------

fernandez_nw_shares_bynw <- read_dfa(.item = "shares", .group = "networth", .agg_level = "bus_cycle2", .method = "fernandez",
                               path = output_directory) %>%
  select(date, cat, networth) %>%
  dplyr::rename(fernandez_share = networth) %>%
  mutate_if(is.numeric, ~.*100)

fernandez_nw_shares_se_bynw <- read_dfa(.item = "delta_se", .group = "networth", .agg_level = "bus_cycle2", .method = "fernandez", path = output_directory) %>%
  mutate_if(is.numeric, ~.*100) %>% 
  dplyr::rename(fernandez_share_se = se)

fernandez_nw_shares_withse <- left_join(fernandez_nw_shares_bynw, fernandez_nw_shares_se_bynw, by = c("date", "cat")) %>%
  dplyr::mutate(fernandez_share_upper_se_band = fernandez_share + 1.96 * fernandez_share_se, 
                fernandez_share_lower_se_band = fernandez_share - 1.96 * fernandez_share_se) %>%
  select(-fernandez_share_se) %>%
  pivot_wider(id_cols = date, names_from = cat, values_from = c(fernandez_share, fernandez_share_upper_se_band, fernandez_share_lower_se_band), names_sep = "_") %>%
  dplyr::mutate(date = as.Date(date, frac=1)) %>% 
  select(date, contains(groupnames)) 

fernandez_nw_shares_withse_top <- fernandez_nw_shares_withse %>% 
  select(date, contains(groupnames[1:3]))

fernandez_nw_shares_withse_bottom <- fernandez_nw_shares_withse %>% 
  select(date, contains(groupnames[4:6]))


# Kalman data -------------------------------------------------------------

kal_nw_shares_bynw <- read_dfa(.item = "shares", .group = "networth", .agg_level = "bus_cycle2", .method = "kalman",
                               path = output_directory) %>%
  select(date, cat, networth) %>%
  dplyr::rename(kal_share = networth) %>%
  mutate_if(is.numeric, ~.*100)

kal_nw_shares_se_bynw <- read_dfa(.item = "delta_se", .group = "networth", .agg_level = "bus_cycle2", .method = "kalman", path = file.path(output_directory)) %>%
  mutate_if(is.numeric, ~.*100) %>% 
  dplyr::rename(kal_share_se = se)

kal_nw_shares_comp_withse <- left_join(kal_nw_shares_bynw, kal_nw_shares_se_bynw, by = c("date", "cat")) %>%
  mutate(kal_share_upper_se_band = kal_share + 1.96 * kal_share_se, kal_share_lower_se_band = kal_share - 1.96 * kal_share_se) %>%
  select(-kal_share_se) %>%
  pivot_wider(id_cols = date, names_from = cat, values_from = c(kal_share, kal_share_upper_se_band, kal_share_lower_se_band), names_sep = "_") %>%
  mutate(date = as.Date(date, frac=1)) %>%
  left_join(nw_shares_bynw %>% dplyr::rename_at(vars(-date), function(n) paste0("dfa_share_", n)), by = "date") %>% 
  select(date, contains(groupnames)) 

kal_nw_shares_comp_withse_top <- kal_nw_shares_comp_withse %>% 
  select(date, contains(groupnames[1:3]))

kal_nw_shares_comp_withse_bottom <- kal_nw_shares_comp_withse %>% 
  select(date, contains(groupnames[4:6]))

# Out of sample data ------------------------------------------------------

balance_sheet_2007q3 <- balancesheeter_buscycle(df = dfa_levels_bynw, date = as.Date(as.yearqtr("2007q3"), frac=1)) %>%
  mutate(date = "dfa_2007q3")
balance_sheet_2009_full <- balancesheeter_buscycle(df = dfa_levels_bynw, date = as.Date(as.yearqtr("2009q3"), frac=1)) %>%
  mutate(date = "dfa_2009q3")

scf_panel <- read_excel(file.path(output_directory, "out_of_sample_bus_cycle_v4.xlsx"), range = "A1:F7") %>%
  dplyr::rename_all(str_trim) %>%
  mutate_all(str_trim) %>%
  mutate_at(vars(-dfacat), as.numeric) %>%
  mutate(morts = -morts, other_liab = -other_liab,
         networth = bus_eq + other_fin + non_fin + morts + other_liab) %>%
  pivot_longer(-dfacat, names_to = "class", values_to = "value") %>%
  dplyr::rename("cat" = dfacat) %>%
  mutate(date = "scf_panel") %>%
  select(date, cat, class, value) %>%
  mutate(value = value/1e12) 

out_of_sample <- bind_rows(list(balance_sheet_2007q3, balance_sheet_2009_full, scf_panel)) %>%
  mutate(class = factor(class, levels = c("other_fin", "non_fin", "bus_eq", "other_liab", "morts", "networth"))) %>%
  arrange(date, class)
out_of_sample <- split(out_of_sample, f = out_of_sample$cat)
out_of_sample <- lapply(out_of_sample, function(df) 
  df %>% select(date, class, value) %>%
    ungroup() %>%
    pivot_wider(id_cols = class, names_from = date)
)


# Buseq and real estate as share of total wealth -----------------------------------

buseq_realestate_levels_bynw <- dfa_levels_bynw %>% 
  dplyr::transmute(date = date,
            cat = cat,
            buseq = corp_equ_mfs + non_corp_eq,
            realestate = real_estate - morts,
            networth = networth)

buseq_realestate_shareofnw <- buseq_realestate_levels_bynw %>% 
  dplyr::group_by(date) %>%
  dplyr::summarise_if(is.numeric, sum) %>%
  dplyr::mutate_at(vars(buseq, realestate), ~./networth * 100)

buseq_shareofnw <- buseq_realestate_shareofnw %>%
  select(date, buseq) 

realestate_shareofnw <- buseq_realestate_shareofnw %>%
  select(date, realestate)   


# Top 1% share of business equity ----------------------------------------

top1_shareofbuseq <- buseq_realestate_levels_bynw %>% 
  select(date, cat, buseq) %>% 
  pivot_wider(names_from = cat, values_from = buseq) %>% 
  dplyr::transmute(date = date,
            top1_shareofbuseq = 100 * (TopPt1 + pct99to999) / (TopPt1 + pct99to999 + pct90to99 + pct70to90 + pct50to70 + Bottom50)) 


# 50 - 90% share of real estate -------------------------------------------

pct50to90_shareofrealestate <- buseq_realestate_levels_bynw %>% 
  select(date, cat, realestate) %>% 
  pivot_wider(names_from = cat, values_from = realestate) %>% 
  dplyr::transmute(date = date,
            pct50to90_shareofrealestate = 100 * (pct50to70 + pct70to90) / (TopPt1 + pct99to999 + pct90to99 + pct70to90 + pct50to70 + Bottom50)) 


# Bottom 50% balance sheet ------------------------------------------------

bottom50_bs <- dfa_levels_bynw %>%
  mutate(oth_assets = assets - real_estate, 
         oth_liab = -(liab - morts), 
         # oth_liab = -oth_liab, 
         morts = -morts, 
         zero = 0) %>%
  filter(cat == "Bottom50") %>%
  select(date,  morts, oth_liab, zero, oth_assets, real_estate, networth) %>%
  mutate(date = as.Date(date, frac=1))

bottom50_nw <- select(bottom50_bs, date, networth)
bottom50_assets <- select(bottom50_bs, date, real_estate, oth_assets)
bottom50_liab <- select(bottom50_bs, date, oth_liab, morts)


# Cyclical variation in wealth --------------------------------------------

dfa_share_change <- dfa_shares_bynw %>%
  select(date, cat, networth) %>%
  dplyr::group_by(cat) %>%
  dplyr::mutate(pp_change_share = networth - dplyr::lag(networth, order_by = date)) %>%
  # 4-quarter moving average
  dplyr::mutate(pp_change_share = (pp_change_share + dplyr::lag(pp_change_share) + dplyr::lag(pp_change_share, 2) + dplyr::lag(pp_change_share, 3))/4) %>%
  ungroup() %>%
  select(date, cat, pp_change_share) %>% 
  pivot_wider(id_cols = date, names_from = cat, values_from = pp_change_share) %>% 
  dplyr::mutate(date = as.Date(date, frac = 1))

# Decomposition -----------------------------------------------------------

dfa_portfolio_returns <- read_csv_nomsg(file = file.path(output_directory, "dfa_portfolio_returns.csv")) %>%
  mutate(date = as.Date(as.yearqtr(date), frac=1))

decomp <- left_join(dfa_levels_bynw %>% select(date, cat, networth), dfa_portfolio_returns, by = c("date", "cat")) %>%
  dplyr::group_by(date) %>% 
  dplyr::mutate(networth_p = networth,
         networth = sum(networth_p)) %>%
  ungroup() %>% 
  dplyr::group_by(cat) %>% 
  dplyr::mutate(networth_share = networth_p / networth,
         E_p = r_p,
         U_p = networth_p / dplyr::lag(networth_p) - 1 - E_p,
         # nw_change = networth_share - lag(networth_share),
         # U_p2 = nw_change * (networth / lag(networth_p)) + (networth / lag(networth) - 1) - E_p, 
         # U_p = U_p1,
         nw_change = networth_p / dplyr::lag(networth_p) - 1
         # nw_change = 100 * nw_change
  ) %>% 
  drop_na() %>%
  select(date, cat, E_p, U_p, nw_change, networth_p) 

decomp_ma <- decomp %>% 
  dplyr::mutate_at(vars(-date, -cat), fourqma) %>% 
  drop_na()


# Ratios ------------------------------------------------------------------

dfa_ratios <- dfa_levels_bynw %>%
  mutate(leverage = liab/assets,
         liq_assets = check_curr + time_dep + mmfs + us_gov_muni + corp_for_bonds + oth_loans + morts_owed + corp_equ_mfs,
         liquidity = liq_assets / networth,
         housing = real_estate/networth,
         housing_no_morts = (real_estate - morts_owed) / networth,
         nnp = (check_curr + time_dep + debt_sec - liab) / networth) %>%
  select(date, cat, leverage, liquidity, housing, housing_no_morts, nnp) %>%
  mutate(date = as.Date(date, frac=1)) %>% 
  pivot_longer(c(-date, -cat), names_to = "ratio")

dfa_ratios_list <- split(dfa_ratios, f = dfa_ratios$ratio) %>% 
  lapply(function(x) x %>% 
           select(-ratio) %>%
           pivot_wider(names_from = cat, values_from = value)  )

dfa_ratios_gr_list <- dfa_ratios_list %>% 
  lapply(function(df) df %>% 
           filter(date >= as.Date("2007-01-31") & date <= as.Date("2011-01-31")) )

dfa_ratios_covid_list <- dfa_ratios_list %>% 
  lapply(function(df) df %>% 
           filter(date >= as.Date("2017-01-31")) )


# Balance sheets during GR ------------------------------------------------

recession_bs <- list()
for (date in as.Date(as.yearqtr(seq(2007,2011,.25)), frac=1)) {
  recession_bs[[as.character(date)]] <- balancesheeter_buscycle(dfa_levels_bynw, date)
}
recession_bs <- bind_rows(recession_bs) %>%
  pivot_wider(id_cols = c(date,cat), names_from = class)

recession_bs <- split(recession_bs, f = recession_bs$cat) 
recession_bs <- lapply(recession_bs, function(df) df %>% ungroup %>% 
                         select(-cat)) %>% 
  lapply(function(df) list(
    other_fin = rplot.bar.data(df[["other_fin"]]),
    non_fin = rplot.bar.data(df[["non_fin"]]),
    bus_eq = rplot.bar.data(df[["bus_eq"]]),
    other_liab = rplot.bar.data(df[["other_liab"]]),
    morts = rplot.bar.data(df[["morts"]]),
    networth = df[["networth"]]
  ))

# NORMALIZE BY DIVIDING BY TOTAL ASSETS 

recession_bs_norm <- recession_bs
for (gr in names(recession_bs)) {
  for (line in names(recession_bs[[1]])[1:5]) {
    recession_bs_norm[[gr]][[line]][["qtrly"]] <- recession_bs[[gr]][[line]][["qtrly"]] / (
      recession_bs[[gr]][["bus_eq"]][["qtrly"]] + 
        recession_bs[[gr]][["other_fin"]][["qtrly"]] + 
        recession_bs[[gr]][["non_fin"]][["qtrly"]])
  }
}


# Balance sheets during COVID-19 crisis -----------------------------------

balancesheeter_covid <- function(df, date) {
  date_input = date
  df_balance <- df %>% 
    mutate(morts = -morts, 
           other_liab = -liab - morts, 
           non_fin = non_fin,
           #other_fin = fin - corp_equ_mfs - non_corp_eq - debt_sec - check_curr - time_dep - mmfs, 
           other_fin = fin - corp_equ_mfs - non_corp_eq, 
           debt_sec = debt_sec,
           #liquid_assets = check_curr + time_dep + mmfs,
           bus_eq = corp_equ_mfs + non_corp_eq) %>%
    select(c(date,cat,bus_eq,other_fin,non_fin,morts,other_liab,networth)) %>%
    dplyr::group_by(cat) %>%
    pivot_longer(-c(date,cat), names_to = "class") %>% 
    arrange(date) %>%
    filter(date == date_input)
  return(df_balance)
}


covid_bs <- list()
for (date in as.Date(as.yearqtr(seq(2017,2020.75,.25)), frac=1)) {
  covid_bs[[as.character(date)]] <- balancesheeter_covid(dfa_levels_bynw, date)
}
covid_bs <- bind_rows(covid_bs) %>%
  pivot_wider(id_cols = c(date,cat), names_from = class)

covid_bs <- split(covid_bs, f = covid_bs$cat) 
covid_bs <- lapply(covid_bs, function(df) df %>% ungroup %>% 
                         select(-cat)) %>% 
  lapply(function(df) list(
    non_fin = rplot.bar.data(df[["non_fin"]], start.date = c(2017,1)),
    other_fin = rplot.bar.data(df[["other_fin"]], start.date = c(2017,1)),
    #debt_sec = rplot.bar.data(df[["debt_sec"]], start.date = c(2017,1)),
    #liquid_assets = rplot.bar.data(df[["liquid_assets"]], start.date = c(2017,1)),
    bus_eq = rplot.bar.data(df[["bus_eq"]], start.date = c(2017,1)),
    other_liab = rplot.bar.data(df[["other_liab"]], start.date = c(2017,1)),
    morts = rplot.bar.data(df[["morts"]], start.date = c(2017,1)),
    networth = df[["networth"]]
  ))

# NORMALIZE BY DIVIDING BY TOTAL ASSETS 

covid_bs_norm <- covid_bs
for (gr in names(covid_bs)) {
  for (line in names(covid_bs[[1]])[1:5]) {
    covid_bs_norm[[gr]][[line]][["qtrly"]] <- covid_bs[[gr]][[line]][["qtrly"]] / (
      covid_bs[[gr]][["bus_eq"]][["qtrly"]] + 
        covid_bs[[gr]][["other_fin"]][["qtrly"]] + 
        covid_bs[[gr]][["non_fin"]][["qtrly"]] #+
        #covid_bs[[gr]][["liquid_assets"]][["qtrly"]] +
        #covid_bs[[gr]][["debt_sec"]][["qtrly"]]
        )
  }
}




# Normalized balance sheet (avg over time) --------------------------------

dfa_balance_share_norm <- dfa_levels_bynw %>%
  mutate(date = as.yearqtr(date)) %>%
  dplyr::group_by(cat) %>%
  # take average across all quarters in the sample 
  dplyr::summarise_if(is.numeric, mean) %>%
  # create balance sheet categories
  mutate(morts = -morts, other_liab = -liab - morts, 
         other_fin = fin - corp_equ_mfs - non_corp_eq, non_fin = non_fin, 
         bus_eq = corp_equ_mfs + non_corp_eq) %>% 
  select(cat,bus_eq,other_fin,non_fin,morts,other_liab,networth) %>%
  # gather(key = class, value = value, -c(cat)) %>%
  dplyr::mutate(dplyr::across(.cols = all_of(c("bus_eq", "other_fin", "non_fin", "morts", "other_liab")),
                .fns = list(share = ~./(bus_eq + other_fin + non_fin)),
                .names = "{col}_{fn}")) %>%
  select(cat, ends_with("_share")) %>%
  pivot_longer(ends_with("_share"), names_to = "asset") %>% 
  mutate(asset = factor(asset, levels = c("other_fin_share", "non_fin_share", "bus_eq_share", "other_liab_share", "morts_share"))) %>%
  arrange(asset) %>%
  pivot_wider(names_from = cat) %>%
  mutate(asset = as.character(asset))


# Asset charts ------------------------------------------------------------

asset_classes <- c("pension", "real_estate", "corp_equ_mfs", "non_corp_eq", "morts", "cons_cred")

asset_levels_bynw <- select(dfa_levels_bynw, date, cat, all_of(asset_classes)) %>%
  pivot_longer(where(is.numeric), names_to = "asset", values_to = "level") %>% 
  mutate(asset = factor(asset, levels = asset_classes)) %>% 
  dplyr::group_split(asset) %>% 
  lapply(
    function(df) df %>% 
      pivot_wider(id_cols = c(date, asset), names_from = cat, values_from = level) %>% 
      select(date,  rev(groupnames))
  ) 

names(asset_levels_bynw) <- asset_classes

asset_shares_bynw <- select(dfa_shares_bynw, date, cat, all_of(asset_classes)) %>%
  pivot_longer(where(is.numeric), names_to = "asset", values_to = "share") %>% 
  mutate(asset = factor(asset, levels = asset_classes)) %>% 
  group_split(asset) %>% 
  lapply(
    function(df) df %>% 
      pivot_wider(id_cols = c(date, asset), names_from = cat, values_from = share) %>% 
      select(date, rev(groupnames))
  ) 

names(asset_shares_bynw) <- asset_classes




# Recession asset charts --------------------------------------------------

recession_asset_classes <- c("liquid_assets", "housing", "bus_eq", "other_assets", "morts", "other_liab")

recession_levels_bynw <- dfa_levels_bynw %>% 
  dplyr::transmute(date=date, 
            cat=cat,
            liquid_assets = check_curr + time_dep + mmfs,
            housing = real_estate,
            bus_eq = corp_equ_mfs + non_corp_eq,
            other_assets = assets - liquid_assets - real_estate - bus_eq,
            morts = morts,
            other_liab = liab - morts) %>%
  pivot_longer(where(is.numeric), names_to = "asset", values_to = "level") %>% 
  mutate(asset = factor(asset, levels = recession_asset_classes)) %>% 
  dplyr::group_split(asset) %>% 
  lapply(
    function(df) df %>% 
      pivot_wider(id_cols = c(date, asset), names_from = cat, values_from = level) %>% 
      select(date,  rev(groupnames))
  ) 

names(recession_levels_bynw) <- recession_asset_classes

gr_levels_bynw <- recession_levels_bynw %>% 
  lapply(function(df) df %>% 
           filter(date >= as.Date("2007-01-31") & date <= as.Date("2011-01-31")) )

covid_levels_bynw <- recession_levels_bynw %>% 
  lapply(function(df) df %>% 
           filter(date >= as.Date("2017-01-31")) )


# Counterfactual data -----------------------------------------------------

share_diff_ma <- read_csv(file.path(output_directory, "share_diff_ma.csv"))
share_diff_noagg_ma <- read_csv(file.path(output_directory, "share_diff_noagg_ma.csv"))
share_diff_nogrp_ma <- read_csv(file.path(output_directory, "share_diff_nogrp_ma.csv"))

share_diff_counterfactual_ma <- full_join(share_diff_noagg_ma, share_diff_nogrp_ma, by = "date", suffix = c(".noagg", ".nogrp"))
share_diff_counterfactual_ma <- full_join(share_diff_ma, share_diff_counterfactual_ma, by = "date") %>% 
  mutate(date = as.Date(as.yearqtr(date), frac=1))


# Two-asset decomposition -------------------------------------------------

dfa_portfolio_returns_2asset <- read_csv_nomsg(file = file.path(output_directory, "dfa_portfolio_returns_2asset.csv")) %>%
  mutate(date = as.Date(as.yearqtr(date), frac=1))

decomp_2asset <- left_join(dfa_levels_bynw %>% select(date, cat, networth), dfa_portfolio_returns, by = c("date", "cat")) %>%
  left_join(dfa_portfolio_returns_2asset, by = c("date", "cat"), suffix = c("", "_2asset")) %>% 
  dplyr::group_by(date) %>% 
  dplyr::mutate(networth_p = networth,
         networth = sum(networth_p)) %>%
  dplyr::ungroup() %>% 
  dplyr::group_by(cat) %>% 
  dplyr::mutate(networth_share = networth_p / networth,
         E_p = r_p,
         E_p_2asset = r_p_2asset,
         U_p = networth_p / dplyr::lag(networth_p) - 1 - E_p,
         # nw_change = networth_share - lag(networth_share),
         # U_p2 = nw_change * (networth / lag(networth_p)) + (networth / lag(networth) - 1) - E_p, 
         # U_p = U_p1,
         nw_change = networth_p / dplyr::lag(networth_p) - 1
         # nw_change = 100 * nw_change
  ) %>% 
  tidyr::drop_na() %>%
  select(date, cat, E_p, E_p_2asset, U_p, nw_change, networth_p) 

decomp_2asset_ma <- decomp_2asset %>% 
  dplyr::mutate_at(vars(-date, -cat), fourqma) %>% 
  drop_na()

